<?php

require_once "common.php";

define('CONTACTS_GROUP',date('Y年m月d日H:i 导入数据'));
_write_html_frame('获取58数据');
_Log("开始抓取58的数据",'note');
$pn_url ='http://cd.58.com/chuzu/1/pn';
$referer ='';
for($i=1;$i<1000;$i++){
    _Log("第{$i}页数据获取开始！",'note');
    $url = $pn_url.$i;
    _Log("发起请求到：{$url}",'note');
    $content = _request($url,$referer);
    $referer = $url;
    if(!$content){
        _Log("第{$i}页数据获取失败！",'error');
        continue;
    }
    _Log("第{$i}页数据获取成功！",'success');
    _Log("开始解析第{$i}页数据！",'note');
    phpQuery::newDocumentHTML($content);
    $detail_pages = pq('.img_list a');
    $detail_pages_size = $detail_pages->size();
    if(!$detail_pages_size>0){
        _Log("第{$i}页数据解析失败！",'success');
    }
    $detail_urls = array();
    for($j=0;$j<$detail_pages_size;$j++){
        $detail_urls[] = $detail_pages->get($j)->getAttribute('href');

    }

    for($j=0;$j<$detail_pages_size;$j++){
        $detail_url =$detail_urls[$j];

        _Log("开始获取{$i}-{$j}详细页内容！",'note');
        $detail_content = _request($detail_url,$url);
        if(!$detail_content){
            _Log("{$i}-{$j}详细页内容获取失败！",'error');
            continue;
        }
        _Log("详细页{$i}-{$j}内容获取成功！",'success');
        phpQuery::newDocumentHTML($detail_content);
        $tel = pq('span.tel-num')->text();
        $title = pq('title')->text();
        $title = preg_replace('/\s*/','',$title);
        _Log("开始抓取[{$title}]的联系信息",'note');
        $tel = preg_replace('/\s*/','',$tel);
        if(isTel($tel)){
            $name =pq('span.tel-num')->next('span')->text();
            _Log("[{$title}]的联系信息获取成功！",'success');
            //echo .'<br>';
            _Log('抓取到号码：['.$name.']'. $tel,'success');
            write_link_to_xml($name,$tel);
        }else{

            _Log("[{$title}]的联系信息获取失败！",'error');
        }

        unset($detail_content);
        usleep(rand(100,1000));
        //exit;
    }




    unset($detail_pages);
    unset($detail_urls);
    unset($content);
    usleep(rand(100,1000));
    _Log("第{$i}页数据获取结束！",'note');
}


function write_link_to_xml($name,$tel){
    static $index = 1,$fIndex = 1;
    if(!has_tel($tel)){
        if($index>=1000){
            $fIndex ++;
            $index = 1;
        }
        $file = dirname(__FILE__).'/data/58.'.NOW_DATE.'.contacts.'.$fIndex.'.csv';
        if($index ==1){
            //输出表头
            file_put_contents($file,"\"姓\",\"名\",\"前缀\",\"后缀\",\"其他电话\",\"qq同步助手 分组\",\n");
        }
        //"{$name}\t{$tel}\n"
        $group = CONTACTS_GROUP;
        file_put_contents($file,"\"$name\",\"\",\"\",\"\",\"{$tel}\",\"{$group}\",\n",FILE_APPEND);
        $index++;
    }
}


function has_tel($tel){
    $dir = dirname(__FILE__).'/data/repeat/'.NOW_DATE.'/';
    if(!is_dir($dir)){
        mkdir($dir,0777,true);
    }
    $file = $dir.$tel;
    if(is_file($file)){
        return true;
    }else{
        file_put_contents($file,$tel);
        return false;
    }
}